##################################################################################################################
# Code for Results in Appendix Section A5.8
##################################################################################################################
write.table(data.frame(Description=c("","","","Appendix Section A5.8: Prevalence of Contract Lobbyists in Federal and State Lobbying")),row.names=F,col.names=F,quote=F)

##################################################################################################################
# To run, please set the working directory to match the location of the folder containing all replication files.
##################################################################################################################
#setwd("Replication_PSRM_ST24")

#if not yet created, create "output" folder
if("output"%in%list.files()==F){dir.create("output")}

#clear environment
rm(list=ls());gc();gc();gc();gc()

#if not yet installed, install packages
if(length(find.package("dplyr",quiet=T))==0){install.packages("dplyr")}
if(length(find.package("knitr",quiet=T))==0){install.packages("knitr")} 
if(as.numeric(available.packages()["knitr","Version"])<1.48){install.packages("knitr")} #updated version required to prevent error with kbl(x,format="latex)
if(length(find.package("kableExtra",quiet=T))==0){install.packages("kableExtra")}

#Load packages
library(dplyr)
library(kableExtra)

#Set POSIX locale
Sys.setlocale(locale="C")

#LDA data files from OpenSecrets downloaded from https://www.opensecrets.org/bulk-data 
#The following requires files lob_lobbyist.txt and lob_lobbying.txt to be unzipped

#unzip
unzip("Data/Appendix_A1_2_and_A5_8/lobby_lda.zip",exdir="Data/Appendix_A1_2_and_A5_8") #N.B. requires approx. 725 MB

#load file
t8<-readLines("Data/Appendix_A1_2_and_A5_8/lobby_lda/lob_lobbyist.txt")
#prepare / assemble data
t8_2<-lapply(t8,function(x) strsplit(x,"\\|"))
t8_2l<-unlist(lapply(t8_2,function(x) length(x[[1]])))
#removing records , likely to be incomplete
t8r1<-t8_2[which(t8_2l==16)]
t8r1m<-as.data.frame(matrix(unlist(t8r1),ncol=16,byrow=T),stringsAsFactors = F)
t8c<-t8r1m[,seq(2,16,2)]
colnames(t8c)<-c("Uniqid","Lobbyist_raw","Lobbyist","Lobbyist_id","Year","OfficialPosition","CID","Formercongmem")

#load file
t7<-readLines("Data/Appendix_A1_2_and_A5_8/lobby_lda/lob_lobbying.txt")
#prepare / assemble data
t7_2<-lapply(t7,function(x) strsplit(x,"\\|"))
t7_2l<-unlist(lapply(t7_2,function(x) length(x[[1]])))
#not including records likely to be incomplete
t7r1<-t7_2[which(t7_2l==32)]
t7r2<-t7_2[which(t7_2l==34)]
t7r1m<-as.data.frame(matrix(unlist(t7r1),ncol=32,byrow=T),stringsAsFactors = F)
t7r2m<-as.data.frame(matrix(unlist(t7r2),ncol=34,byrow=T),stringsAsFactors = F)
t7r2m2<-t7r2m[,-c(7,9)]
colnames(t7r1m)<-paste("V",seq(1,ncol(t7r1m),1),sep="")
colnames(t7r2m2)<-paste("V",seq(1,ncol(t7r2m2),1),sep="")
t7c<-rbind(t7r1m,t7r2m2)
t7c<-t7c[,c(2,4,6,7,seq(8,12,2),13,seq(14,32,2))]
colnames(t7c)<-c("Uniqid", "Registrant_raw", "Registrant", "Isfirm", "Client_raw", "Client", "Ultorg", "Amount", "Catcod", "Source", "Self", "IncludeNSFS",
                 "Use","Ind","Year","Type","Typelong","Affiliate")
###

#filter to avoid unamended/duplicate amounts
t7c<-filter(t7c,Use!="n")
#get numeric variable for amounts
t7c$amount2<-as.numeric(trimws(gsub("\\,","",t7c$Amount)))
#indicator for lobbying firm / contract lobbying
t7c$contract1<-as.numeric(grepl("y",t7c$Isfirm)==T)


#get CRP (Center for Responsive Politics) organization category codings from OpenSecrets (https://www.opensecrets.org/downloads/crp/CRP_Categories.txt)
#in organized table
crp3<-readRDS("Data/Appendix_A1_2_and_A5_8/CRP_Categories.rds")

#merge sector category information with client data
t7c2<-merge(t7c,crp3,by=c("Catcod"),all.x=T)

#combine client data with lobbyist data
t78c<-merge(t7c2,t8c[,c("Uniqid","Lobbyist","Lobbyist_id","Year")],all=T,by=c("Uniqid","Year"))
#filter out observations without information about lobbyists
t78c2<-t78c[which(is.na(t78c$contract1)==F&is.na(t78c$Lobbyist)==F&is.na(t78c$Lobbyist_id)==F),]
#indicator variable for observations
t78c2$count<-1

#calculate values for federal level in 2017 
t78c2a2 <-t78c2 %>% 
  filter(Year==2017) %>% 
  summarise(.groups="keep",sum_count=sum(count),sum_count=sum(count[contract1==0]),
            pr_con=sum(count[contract1==1])/sum(count),n_con=length(unique(Lobbyist_id[contract1==1])),n=length(unique(Lobbyist_id)),
            pr_con_unique=length(unique(Lobbyist_id[contract1==1]))/length(unique(Lobbyist_id)))

#load state-level lobbying registrations, downloaded from FTM websites: 
#https://www.followthemoney.org/tools/lobbyist-link?s=AL&y=2017, 
#https://www.followthemoney.org/tools/lobbyist-link?s=AK&y=2017, 
# etc.
dl<-lapply(list.files("Data/Appendix_A1_2_and_A5_8/Lobbyist_registrations/2017",full.names = T), function(x) read.csv(x,stringsAsFactors = F,header=F))
#assemble into data set
dl2<-do.call(rbind,dl)
colnames(dl2)<-c("STATE","YEAR","LOBBYIST","N_CLIENTS")
#estimate contract lobbyist by whether or not lobbyist has more than one client
dl2$contract_proxy<-as.numeric(dl2$N_CLIENTS>1)

#aggregate by state and year (2017): proportion of contract lobbyists and contract-lobbying relations
dl2$count<-1
dl2a<-dl2 %>% group_by(STATE,YEAR) %>%
  summarise(.groups="keep",pr_contract=sum(count[contract_proxy==1])/sum(count),pr_contract_rel=sum(N_CLIENTS[contract_proxy==1])/sum(N_CLIENTS))

write.table(data.frame(Description=c("","Results:")),row.names=F,col.names=F,quote=F)
write.table(data.frame(Description=c("","Figure A5: Prevalence of Contract Lobbyists in Federal and State Lobbying")),row.names=F,col.names=F,quote=F)

png("output/PSRM_figA5.png",width=10,height=4,units="in", res=300)
  par(mfrow=c(1,2))
  #Panel 1
  hist(xlab=c("Proportion"),main=c("(1) Proportion of Client-Lobbyist Relationships:\nContract Lobbyists"),dl2a$pr_contract_rel,xlim=c(0,1))
  abline(v=c(t78c2a2$pr_con,dl2a$pr_contract_rel[dl2a$STATE=="WISCONSIN"]),lty=c(1,2))
  legend("topleft",legend=c("Wisc.","Federal"),lty=c(2,1))
  #Panel 2
  hist(xlab=c("Proportion"),main=c("(2) Proportion of Lobbyists:\nContract Lobbyists"),dl2a$pr_contract,xlim=c(0,1))
  abline(v=c(t78c2a2$pr_con_unique,dl2a$pr_contract[dl2a$STATE=="WISCONSIN"]),lty=c(1,2))
  legend("topright",legend=c("Wisc.","Federal"),lty=c(2,1))
dev.off()

write.table(data.frame(Description=c("(N.B. The file for this figure is saved as 'PSRM_figA5.png' in the 'output' folder.)")),row.names=F,col.names=F,quote=F)

#Include code again so that Figure A5 also shown in R if code file is run using source()
par(mfrow=c(1,2))
#Panel 1
hist(xlab=c("Proportion"),main=c("(1) Proportion of Client-Lobbyist Relationships:\nContract Lobbyists"),dl2a$pr_contract_rel,xlim=c(0,1))
abline(v=c(t78c2a2$pr_con,dl2a$pr_contract_rel[dl2a$STATE=="WISCONSIN"]),lty=c(1,2))
legend("topleft",legend=c("Wisc.","Federal"),lty=c(2,1))
#Panel 2
hist(xlab=c("Proportion"),main=c("(2) Proportion of Lobbyists:\nContract Lobbyists"),dl2a$pr_contract,xlim=c(0,1))
abline(v=c(t78c2a2$pr_con_unique,dl2a$pr_contract[dl2a$STATE=="WISCONSIN"]),lty=c(1,2))
legend("topright",legend=c("Wisc.","Federal"),lty=c(2,1))


write.table(data.frame(Description=c("","Median proportion of client-lobbyist relationships that include a contract lobbyist across states for 2017:")),row.names=F,col.names=F,quote=F)
print(round(median(dl2a$pr_contract_rel),2)) #0.74
write.table(data.frame(Description=c("","Wisconsin's proportion of client-lobbyist relationships that include a contract lobbyist for 2017:")),row.names=F,col.names=F,quote=F)
print(round(median(summary(dl2a$pr_contract_rel[dl2a$STATE=="WISCONSIN"])),2)) #0.69
write.table(data.frame(Description=c("","Federal level (LDA) proportion of client-lobbyist relationships that include a contract lobbyist for 2017:")),row.names=F,col.names=F,quote=F)
print(round(t78c2a2$pr_con,2)) #0.83

write.table(data.frame(Description=c("","Median proportion of unique lobbyists estimated to be contract lobbyist, across states for 2017:")),row.names=F,col.names=F,quote=F)
print(round(median(dl2a$pr_contract),2)) #0.23
write.table(data.frame(Description=c("","Proportion of unique lobbyists estimated to be contract lobbyist in Wisconsin for 2017:")),row.names=F,col.names=F,quote=F)
print(round(median(dl2a$pr_contract[dl2a$STATE=="WISCONSIN"]),2)) #0.17
write.table(data.frame(Description=c("","Federal level (LDA) proportion of unique lobbyists that are contract lobbyist for 2017:")),row.names=F,col.names=F,quote=F)
print(round(median(t78c2a2$pr_con_unique),2)) #0.42

#aggregate LDA data by sector 
t78c2a1s <-t78c2 %>% 
  filter(Year<2021) %>%
  group_by(Sector) %>% 
  summarise(.groups="keep",sum_count=sum(count),pr_con=sum(count[contract1==1])/sum(count),
            pr_con_unique=length(unique(Lobbyist_id[contract1==1]))/length(unique(Lobbyist_id)))

#remove observations without sector info
t78c2a1sf<-t78c2a1s[which(is.na(t78c2a1s$Sector)==F),]
#round proportion of client-lobbyist relationships classified as contract lobbyist
t78c2a1sf$pr_con<-round(t78c2a1sf$pr_con,2)
t78c2a1sf<-t78c2a1sf[order(t78c2a1sf$pr_con,decreasing = T),]

#Results: Table A4
write.table(data.frame(Description=c("","Table A4: Proportion of Client-Contract Lobbyist Relationships by Sector"))
            ,row.names=F,col.names=F,quote=F)
colnames(t78c2a1sf)[which(colnames(t78c2a1sf)=="pr_con")]<-c("Proportion With Contract Lobbyist")
print(kbl(t78c2a1sf[,c(1,3)],row.names=F,format="latex")) 
print(kbl(t78c2a1sf[,c(1,3)],row.names=F,format="html")) 
write(kbl(t78c2a1sf[,c(1,3)],row.names=F,format="latex"),"output/PSRM_tabA4.tex")
write.table(data.frame(Description=c("(N.B. The file for this table is saved as 'PSRM_tabA4.tex' in the 'output' folder.)")),row.names=F,col.names=F,quote=F)

# \begin{tabular}[t]{l|r}
# \hline
# Sector & Proportion With Contract Lobbyist\\
# \hline
# Unknown & 0.96\\
# \hline
# Lawyers \& Lobbyists & 0.91\\
# \hline
# Other & 0.89\\
# \hline
# Defense & 0.87\\
# \hline
# Communications/Electronics & 0.85\\
# \hline
# Transportation & 0.85\\
# \hline
# Energy \& Natural Resources & 0.84\\
# \hline
# Misc Business & 0.84\\
# \hline
# Health & 0.83\\
# \hline
# Finance/Insur/RealEst & 0.82\\
# \hline
# Construction & 0.79\\
# \hline
# Agribusiness & 0.77\\
# \hline
# Ideological/Single-Issue & 0.61\\
# \hline
# Labor & 0.46\\
# \hline
# \end{tabular}


